{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用Python从底层实现了一个全连接神经网络\n",
    "\n",
    "\n",
    "# 先对MNIST数据集进行读入以及处理\n",
    "# 数据从MNIST官网直接下载\n",
    "\n",
    "import numpy as np\n",
    "from struct import unpack\n",
    "\n",
    "def read_image(path):\n",
    "    with open(path, 'rb') as f:\n",
    "        magic, num, rows, cols = unpack('>4I',f.read(16))\n",
    "        img = np.fromfile(f, dtype=np.uint8).reshape(num, 784, 1)  # 在这里可以调整图片读入格式\n",
    "    return img\n",
    "\n",
    "def read_label(path):\n",
    "    with open(path, 'rb') as f:\n",
    "        magic, num = unpack('>2I',f.read(8))\n",
    "        label = np.fromfile(f, dtype=np.uint8)\n",
    "    return label\n",
    "\n",
    "def normalize_image(image):\n",
    "    img = image.astype(np.float32)/255.0\n",
    "    return img\n",
    "\n",
    "def one_hot_label(label):\n",
    "    lab = np.zeros((label.size, 10))\n",
    "    for i, row in enumerate(lab):\n",
    "        row[label[i]] = 1\n",
    "    return lab\n",
    "\n",
    "\n",
    "\n",
    "# 加载数据集以及数据预处理\n",
    "\n",
    "def dataset_loader():\n",
    "    train_image = read_image(r'C:\\Users\\95410\\Downloads\\数据集\\MNIST\\train-images.idx3-ubyte')\n",
    "    train_label = read_label(r'C:\\Users\\95410\\Downloads\\数据集\\MNIST\\train-labels.idx1-ubyte')\n",
    "    test_image = read_image(r'C:\\Users\\95410\\Downloads\\数据集\\MNIST\\t10k-images.idx3-ubyte')\n",
    "    test_label = read_label(r'C:\\Users\\95410\\Downloads\\数据集\\MNIST\\t10k-labels.idx1-ubyte')\n",
    "\n",
    "    train_image = normalize_image(train_image)\n",
    "    train_label = one_hot_label(train_label)\n",
    "    train_label = train_label.reshape(train_label.shape[0],train_label.shape[1],1)\n",
    "\n",
    "    test_image = normalize_image(test_image)\n",
    "    test_label = one_hot_label(test_label)\n",
    "    test_label = test_label.reshape(test_label.shape[0],test_label.shape[1],1)\n",
    "    \n",
    "    return train_image, train_label, test_image, test_label\n",
    "\n",
    "train_image, train_label, test_image, test_label = dataset_loader()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 编写神经网络类\n",
    "class NetWork(object):\n",
    "    \n",
    "    def __init__(self, sizes):\n",
    "        '''\n",
    "        初始化神经网络，给每层的权重和偏置赋初值\n",
    "        权重为一个列表，列表中每个值是一个二维n×m的numpy数组\n",
    "        偏置为一个列表，列表中每个值是一个二维n×1的numpy数组'''\n",
    "        self.num_layers = len(sizes)\n",
    "        self.sizes = sizes\n",
    "        self.weights = [np.random.randn(n,m) for m,n in zip(sizes[:-1], sizes[1:])]   # 一定得用rnadn而不是random\n",
    "        self.biases = [np.random.randn(n,1) for n in sizes[1:]]\n",
    "        \n",
    "    def sigmoid(self, z):\n",
    "        '''sigmoid激活函数'''\n",
    "        a = 1.0 / (1.0 + np.exp(-z))\n",
    "        return a\n",
    "    \n",
    "    def sigmoid_prime(self, z):\n",
    "        '''sigmoid函数的一阶导数'''\n",
    "        return self.sigmoid(z) * (1 - self.sigmoid(z))\n",
    "    \n",
    "    \n",
    "    def feed_forward(self, x):\n",
    "        '''完成前向传播过程，由输入值计算神经网络最终的输出值\n",
    "        输入为一个列向量，输出也为一个列向量'''\n",
    "        value = x\n",
    "        for i in range(len(self.weights)):\n",
    "            value = self.sigmoid(np.dot(self.weights[i], value) + self.biases[i])\n",
    "        y = value\n",
    "        return y\n",
    "    \n",
    "    def evaluate(self, images, labels):\n",
    "        result = 0\n",
    "        for img, lab in zip(images, labels):\n",
    "            predict_label = self.feed_forward(img)\n",
    "            if np.argmax(predict_label) == np.argmax(lab):\n",
    "                result += 1\n",
    "        return result\n",
    "        \n",
    "    \n",
    "    def SGD(self, train_image, train_label, epochs, mini_batch_size, eta):\n",
    "        '''Stochastic gradiend descent随机梯度下降法，将训练数据分多个batch\n",
    "        一次使用一个mini_batch_size的数据，调用update_mini_batch函数更新参数'''\n",
    "        for j in range(epochs):\n",
    "            mini_batches_image = [train_image[k:k+mini_batch_size] for k in range(0, len(train_image), mini_batch_size)]\n",
    "            mini_batches_label = [train_label[k:k+mini_batch_size] for k in range(0, len(train_label), mini_batch_size)]\n",
    "            for mini_batch_image, mini_batch_label in zip(mini_batches_image, mini_batches_label):\n",
    "                self.update_mini_batch(mini_batch_image, mini_batch_label, eta, mini_batch_size)\n",
    "            print(\"Epoch{0}: accuracy is {1}/{2}\".format(j+1, self.evaluate(test_image, test_label), len(test_image)))\n",
    "                \n",
    "    def update_mini_batch(self, mini_batch_image, mini_batch_label, eta, mini_batch_size):\n",
    "        '''通过一个batch的数据对神经网络参数进行更新\n",
    "        需要对当前batch中每张图片调用backprop函数将误差反向传播\n",
    "        求每张图片对应的权重梯度以及偏置梯度，最后进行平均使用梯度下降法更新参数'''\n",
    "        nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
    "        nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
    "        for x,y in zip(mini_batch_image, mini_batch_label):\n",
    "            delta_nabla_b, delta_nabla_w = self.backprop(x, y)\n",
    "            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n",
    "            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n",
    "        self.weights = [w-(eta/mini_batch_size)*nw for w, nw in zip(self.weights, nabla_w)]\n",
    "        self.biases = [b-(eta/mini_batch_size)*nb for b, nb in zip(self.biases, nabla_b)]\n",
    "    \n",
    "    def backprop(self, x, y):\n",
    "        '''计算通过单幅图像求得的每层权重和偏置的梯度'''\n",
    "        delta_nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
    "        delta_nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
    "        \n",
    "        # 前向传播，计算各层的激活前的输出值以及激活之后的输出值，为下一步反向传播计算作准备\n",
    "        activations = [x]\n",
    "        zs = []\n",
    "        for b, w in zip(self.biases, self.weights):\n",
    "            z = np.dot(w, activations[-1]) + b\n",
    "            zs.append(z)\n",
    "            activation = self.sigmoid(z)\n",
    "            activations.append(activation)\n",
    "        \n",
    "        # 先求最后一层的delta误差以及b和W的导数\n",
    "        cost = activations[-1] - y    \n",
    "        delta = cost * self.sigmoid_prime(zs[-1])\n",
    "        delta_nabla_b[-1] = delta\n",
    "        delta_nabla_w[-1] = np.dot(delta, activations[-2].transpose())\n",
    "        # 将delta误差反向传播以及各层b和W的导数，一直计算到第二层\n",
    "        for l in range(2, self.num_layers):\n",
    "            delta = np.dot(self.weights[-l+1].transpose(), delta) * self.sigmoid_prime(zs[-l])\n",
    "            delta_nabla_b[-l] = delta\n",
    "            delta_nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())\n",
    "        return delta_nabla_b, delta_nabla_w\n",
    "        \n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch1: accuracy is 8194/10000\n",
      "Epoch2: accuracy is 8371/10000\n",
      "Epoch3: accuracy is 8454/10000\n",
      "Epoch4: accuracy is 8458/10000\n",
      "Epoch5: accuracy is 8513/10000\n",
      "Epoch6: accuracy is 8537/10000\n",
      "Epoch7: accuracy is 8543/10000\n",
      "Epoch8: accuracy is 8540/10000\n",
      "Epoch9: accuracy is 8586/10000\n",
      "Epoch10: accuracy is 9461/10000\n",
      "Epoch11: accuracy is 9452/10000\n",
      "Epoch12: accuracy is 9450/10000\n",
      "Epoch13: accuracy is 9460/10000\n",
      "Epoch14: accuracy is 9477/10000\n",
      "Epoch15: accuracy is 9466/10000\n",
      "Epoch16: accuracy is 9494/10000\n",
      "Epoch17: accuracy is 9461/10000\n",
      "Epoch18: accuracy is 9459/10000\n",
      "Epoch19: accuracy is 9476/10000\n",
      "Epoch20: accuracy is 9452/10000\n",
      "Epoch21: accuracy is 9474/10000\n",
      "Epoch22: accuracy is 9486/10000\n",
      "Epoch23: accuracy is 9457/10000\n",
      "Epoch24: accuracy is 9457/10000\n",
      "Epoch25: accuracy is 9480/10000\n",
      "Epoch26: accuracy is 9467/10000\n",
      "Epoch27: accuracy is 9477/10000\n",
      "Epoch28: accuracy is 9484/10000\n",
      "Epoch29: accuracy is 9490/10000\n",
      "Epoch30: accuracy is 9485/10000\n"
     ]
    }
   ],
   "source": [
    "# 训练神经网络\n",
    "\n",
    "net_trained = NetWork([784, 30, 10])\n",
    "net_trained.SGD(train_image, train_label, 30, 10, 3)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
